package com.skyblue.crawel.web;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import com.skyblue.crawel.pojo.DownloadFile;
import com.skyblue.crawel.service.DownloadAsyncService;
import com.skyblue.crawel.utils.DateUtils;
import com.skyblue.crawel.utils.DownloadUtils;

@RestController
@RequestMapping("/crawler")
public class CrawlerController {
	private Logger logger = LoggerFactory.getLogger(getClass());
	
	public static String hahamx_URL = "https://www.hahamx.cn/topic/1/new/";
	public static String zhainanfuli_URL = "http://zhainanba.net/category/zhainanfuli/page/";
	
	@Autowired
	private DownloadAsyncService downloadAsyncService;
	
	//localhost:8080/crawler/downloadImages?start=1&end=1
	@RequestMapping(path = "/hahamxDownloadImages")
	public void hahamxDownloadImages(int start, int end) {
		// 利用Jsoup获得连接
		Connection connect;
		for (int i = start; i <= end; i++) {
			connect = Jsoup.connect(hahamx_URL + i);
			try {
				// 得到Document对象
				Document document = connect.get();
				// 查找所有img标签
				Elements imgs = document.getElementsByTag("img");
				System.out.println("共检测到下列图片URL：");
				System.out.println("开始下载");
				// 遍历img标签并获得src的属性
				for (Element element : imgs) {
					// 获取每个img标签URL "abs:"表示绝对路径
					String imgSrc = element.attr("data-original");
					// 打印URL
					if(imgSrc.length()==0) {//没有图片内容
						continue;
					}
					if(imgSrc.indexOf("http")>-1) {//外站图片
						continue;
					}
					if(imgSrc.indexOf("gif")>-1) {//gif图片
						continue;
					}
					if (imgSrc.indexOf("normal") > -1) {
						imgSrc = imgSrc.replaceAll("normal", "big");
					}
					if (imgSrc.indexOf("middle") > -1) {
						imgSrc = imgSrc.replaceAll("middle", "big");
					}
					imgSrc = "http:"+imgSrc;
					System.out.println(imgSrc);
					// 下载图片到本地
					DownloadUtils.downImages("E:/youtube/images/hahamx/"+i, imgSrc,null);
				}
				System.out.println("下载完成");
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	//localhost:8080/crawler/zhainanfuliDownloadImages?start=1&end=1
		@RequestMapping(path = "/zhainanfuliDownloadImages")
		public void zhainanfuliDownloadImages(int start, int end) {
			logger.info("开始--------------------------------------------------------------------------");
			Date beginDate = new Date();
			// 利用Jsoup获得连接
			Connection connect;
			for (int i = start; i <= end; i++) {
				connect = Jsoup.connect(zhainanfuli_URL + i);
				try {
					// 得到Document对象
					Document document = connect.get();
					// 查找所有img标签
					Elements hrefs = document.getElementsByTag("a");
					System.out.println("开始查找");
					// 遍历img标签并获得src的属性
					for (Element element : hrefs) {
						// 获取每个URL "abs:"表示绝对路径
						String title = element.attr("title");
						// 打印URL
						if(title.indexOf("今日妹子图")<0) {//不是妹子图片
							continue;
						}
						String url = element.attr("href");
						System.out.println(element.attr("href"));
						downloadMeizitu(url);
						
					}
					System.out.println("下载完成");
					Date endDate = new Date();
					long time = endDate.getTime()-beginDate.getTime();
					logger.info("结束--------------------------------------------------------------------------，时间差为"+time);
					
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		
		private void downloadMeizitu(String url) {
			Connection connect =  Jsoup.connect(url);
			try {
				// 得到Document对象
				Document document = connect.get();
				// 查找所有img标签
				Elements imgs = document.getElementsByTag("img");
				System.out.println("共检测到图片URL："+imgs.size());
				String filePath = "E:/youtube/images/zhainanfuli/"+DownloadUtils.getHtml(url);
				File dir = new File(filePath);
		        if (dir.exists()) {
		        	logger.info("该链接的图片已经下载");
		            return;
		        }
				System.out.println("开始下载");
				List<Future<DownloadFile>> list = new ArrayList<Future<DownloadFile>>();
				// 遍历img标签并获得src的属性
				for (Element element : imgs) {
					// 获取每个URL "abs:"表示绝对路径
					String title = element.attr("title");
					// 打印URL
					if(title.indexOf("今日妹子图")<0) {//不是妹子图片
						continue;
					}
					String imgSrc = element.attr("src");
					Map<String,String> map = new HashMap<String,String>();
					map.put("Referer", url);
					map.put("User-Agent", "Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1");
					DownloadFile downloadFile = new DownloadFile();
					downloadFile.setBeginDate(new Date());
					downloadFile.setFileName(filePath);
					Future<DownloadFile> future = downloadAsyncService.downloadImage(downloadFile,filePath, imgSrc,map);//多线程下图
					list.add(future);
					//DownloadUtils.downImages(filePath, imgSrc, map);//单线程下图
					//logger.info(future.get());
				}
				for(Future<DownloadFile> future:list) {
					while(true) {
						if(future.isDone()) {
							try {
								logger.info(future.get().getFileName()+"耗时"+future.get().getDuration()+"毫秒");
							} catch (InterruptedException e) {
								// TODO Auto-generated catch block
								e.printStackTrace();
							} catch (ExecutionException e) {
								// TODO Auto-generated catch block
								e.printStackTrace();
							}
							break;
						}
					}
				}
				logger.info("下载完成----------------------------------------------------");
			} catch (IOException e) {
				e.printStackTrace();
			} 
		}
		
		
}
